* This do-file reads in raw datasets on federal, state, and local minimum wage policy and employer minimum wage policy from NELP (supplemented and checked by Bryce Springfield) and saves a clean version of the data (all_mw.dta).
* Author: Ellora Derenoncourt

* Outputs:
/*
*1. Monthly state, locality, and employer minimum wage dataset. 
	filepath + name: data/minimum_wages/all_mw.dta
*/

*1. Monthly state, locality, and employer minimum wage dataset. 

	* Read in employer voluntary minimum wage data, sheet on employer characteristics
	import excel using "$data/employer_vmw/raw/employer_voluntary_minimum_wages.xlsx", clear first sheet(employer)

	* Remove duplicate Walmart entry
	drop if employer_name=="Walmart"
	
	* Clean errors in employer key variable
	drop if emp_key==218
	replace emp_key=34 if regexm(employer_name,"Fifth Third")
	replace emp_key=35 if regexm(employer_name,"First Horizon")
	drop if emp_key==270
	drop if emp_key==29
	
	* Enter employment info for Moo Cluck Moo from https://www.freep.com/story/news/local/michigan/wayne/2016/10/13/canton-closing-not-demise-moo-cluck-moo/91994672/
	replace n_employees_global=12 if regexm(employer_name, "Moo Cluck Moo")
	replace n_employees_us=12 if regexm(employer_name, "Moo Cluck Moo")
	
	* Remove duplicate entries with no additional information
	quietly bysort employer_name :  gen dup = cond(_N==1,0,_n)
	tab dup
	keep if dup<=1
	drop dup
	
	* Rename employer name var for checking
	rename employer_name employer_name_merge
	
	tempfile employer_chars
	save `employer_chars'

	* Read in data on employer policies
	import excel using "$data/employer_vmw/raw/employer_voluntary_minimum_wages.xlsx", clear first
	
	* Clean errors in employer key variable
	replace emp_key=271 if regexm(employer_name, "Reagan")
	replace emp_key=113 if emp_key==114 & regexm(employer_name, "Truck")
	replace emp_key=93 if emp_key==92 & regexm(employer_name, "Baptist")
	
	* Drop minimum wages that were collectively bargained with unions
	drop if cba==1
	
	* Convert minimum wage variable to numeric after dropping non-numeric entries (min dollar raise amount--just one observation)
	gen byte notnumeric = real(min_mw)==.
	drop if notnumeric==1
	drop notnumeric
	destring min_mw, replace
	
	* Drop annual salary minimum wage (just one observation)
	drop if min_mw>100

	* Drop Target starting pay range
	drop if max_mw=="24" & regexm(employer_name, "Target")
	
	* Drop percent raise for workers (just one observation)
	drop if min_mw<1 
	
	* Keep policies that cover all workers (true company-wide minimum)
	keep if coverage_worker=="all"
	replace coverage_geo=lower(coverage_geo)
	
	* Destring max minimum wage variable
	destring max_mw, replace

	* Generate midpoint of minimum and maximum minimum wages
	gen mean_mw=(min_mw+max_mw)/2
	replace mean_mw=min_mw if max_mw==. // Use minimum for average minimum wage if no max mw available

	* Clean policy start date variable
	replace start=lower(start)
	replace start=subinstr(start,"-","",.)
	replace start= substr(start,1,length(start)-2)+"20"+substr(start,length(start)-1,.)

	* Create monthly date variable from announcement date
	gen monthly_date=mofd(announce_date)
	gen year=yofd(announce_date) // Create year variable
	format monthly_date %tm // Format monthly date variable

	* Create monthly date variable from start date
	gen date_start = date(start_date, "DMY")
	gen monthly_date_start=mofd(date_start)
	gen year_start=yofd(date_start) // Create year variable
	format date_start %td // Format date variable
	format monthly_date_start %tm // Format monthly date variable

	* Generate employer id code
	encode employer_name,gen(employer_id)

	* If policy row is a duplicate, remove NELP version
	quietly bysort emp_key monthly_date:  gen dup = cond(_N==1,0,_n)
	drop if NELP==1 & dup>0
	drop dup
	
	* If two policies are announced on the same date, use the start date as the month for the second policy
	quietly bysort employer_id monthly_date:  gen dup = cond(_N==1,0,_n)
	replace monthly_date = monthly_date_start if dup>0
	drop dup
	
	* Confirm no more duplicates
	quietly bysort employer_id monthly_date:  gen dup = cond(_N==1,0,_n)
	tab dup 
	drop dup 

	* Set as panel data
	xtset employer_id monthly_date
	drop if monthly_date==.
	drop if year<2000 /// These are policies with unclear dates 

	preserve
	* Bring in employer-level characteristics data
	merge m:1 emp_key using `employer_chars'
	gen main_employer=(_merge==3) //
	*br if _merge==1
	drop if _merge==1 // Two obs dropping: United Services Automobile Association & University of Tennessee Health Sciences Center -- need to confirm no policy
	drop _merge
	save "$data/employer_vmw/clean_employer_voluntary_minimum_wages.dta", replace // Save clean employer vmw file
	restore
	
	* Fill panel with previous min wage until policy change
	tsfill, full
	replace mean_mw=mean_mw[_n-1] if mean_mw==. & employer_id==employer_id[_n-1]

	* Re-set as panel data and restrict to time frome of state and local MW data
	xtset employer_id monthly_date
	keep if inrange(year,2010,2023)
	keep if monthly_date<=755

	* Bring in employer-level characteristics data
	merge m:1 emp_key using `employer_chars'
	gen main_employer=(_merge==3) //
	drop _merge

	* Save clean employer mw dataset as a tempfile
	tempfile employer_mw
	save `employer_mw'
	
	* Read in substate minimum wage data from Vaghul and Zipperer (https://github.com/benzipperer/historicalminwage)
	use $data/minimum_wages/raw/mw_substate_stata/mw_substate_monthly.dta, clear
	gen year=yofd(dofm(monthly_date)) // Create year variable
	keep if inrange(year,2010,2023) // Restrict to same time frame as employer data

	* Create indicator for policy change and save only policy changes
	sort statefips monthly_date
	gen change=(mean_mw!=mean_mw[_n-1] & statefips==statefips[_n-1])
	keep if change==1

	* Keep only substate policies that exceed their state minima
	keep if abovestate==1 

	* Generate locality identifier and set as panel data
	egen locality_id=group(locality statename)
	sort locality_id monthly_date
	xtset locality_id monthly_date

	* Fill panel with previous min wage until policy change
	tsfill, full
	replace mean_mw=mean_mw[_n-1] if mean_mw==. & locality_id==locality_id[_n-1]

	* Re-set as panel data
	xtset locality_id monthly_date

	* Save clean locality mw dataset as a tempfile
	tempfile local_mw
	save `local_mw'

	* Read in state level minimum wage data from Vaghul and Zipperer (https://github.com/benzipperer/historicalminwage)
	use $data/minimum_wages/raw/mw_state_stata/mw_state_monthly.dta, clear
	gen year=yofd(dofm(monthly_date)) // Create year variable
	keep if inrange(year,2010,2023) // Restrict to same time frame as employer data

	* Create indicator for policy change and save only policy changes
	sort statefips monthly_date
	gen change=(mean_mw!=mean_mw[_n-1] & statefips==statefips[_n-1])
	keep if change==1

	* Keep only state policies that exceed the federal MW
	drop if mean_mw<=min_fed

	* Set as panel data
	sort statefips monthly_date
	 xtset statefips monthly_date
	 
	* Fill panel with previous min wage until policy change
	tsfill, full
	replace mean_mw=mean_mw[_n-1] if mean_mw==. & statefips==statefips[_n-1]
	replace min_fed_mw=min_fed_mw[_n-1] if min_fed_mw==. 

	* Re-set as panel data
	xtset statefips monthly_date

	* Append local MW and employer MW datasets
	append using `local_mw'
	append using `employer_mw'

	* Save final clean dataset of all minimum wage policies
	save $data/minimum_wages/all_mw.dta, replace
